from lxml import etree

xml_str = """
<html>
    <body>
        <div class="content">
            <h1 class = 'h1'>Welcome</h1>
            <p>This is a paragraph.</p>
            <a href="http://example.com">Link</a>
        </div>
    </body>
</html>
"""

# tree = etree.HTML(xml_str)
# print(tree.find())

# print(tree.attrib)
# print(tree.tag)
# print(tree.get['h1'])



text = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<ul class="ul">
    <li id="1" class="1"></li>
    <li id="2" class="2"></li>
    <li id="3" class="3"></li>
</ul>

<ol class="ol0">
    <li id="01" class="01"></li>
    <li id="02" class="02"></li>
    <li id="03" class="03"></li>
</ol>
</body>
</html>
"""

# tree = etree.HTML(text)
# # print(tree)
#
# titles = tree.cssselect('ul li')
# print(titles)
#
from lxml import etree



# tree = etree.parse('ee.html', parser=etree.HTMLParser())
# print(type(tree))

# with open("ee.html", "r") as f:
#     tree = etree.parse(f, parser=etree.HTMLParser())
#     print(type(tree))

# text = """
# <!DOCTYPE html>
# <html lang="en">
# <head>
#     <meta charset="UTF-8">
#     <title>Title</title>
# </head>
# <body>
# <ul>
#     <li id="li1">li1</li>
#     <li id="li2">li2</li>
#     <li id="li3">li3</li>
# </ul>
# </body>
# </html>
# """
#
#
# # 默认parse是XMLParser
# tree = etree.fromstring(text, parser=etree.HTMLParser())
# print(type(tree))



#
# hxml_str = """
# <!DOCTYPE html>
# <html lang="en">
# <head>
#     <meta charset="UTF-8">
#     <title>Title</title>
# </head>
# <body>
# <ul>
#     <li id="li1">li1</li>
#     <li id="li2">li2</li>
#     <li id="li3">li3</li>
# </ul>
# </body>
# </html>
# """
#
# tree = etree.HTML(hxml_str)
# # print(type(tree))
# # print(tree.tag, tree.attrib, )
#
# title = tree.find('head/title')
# print(type(title))
# lis = tree.findall('body/ul/li')
# for li in lis:
#     print(type(li))







# xml_str = """
# <bookstore id='a101' title='abc'>
#   hello world
# </bookstore>
# """
#
# print(dir(etree))
# tree = etree.XML(xml_str)
# print(tree.tag, tree.text, tree.attrib)



# xml_str = """
# <bookstore>
#   <book category="cooking1">
#     <title lang="en">Everyday Italian1</title>
#     <author>Giada De Laurentiis1</author>
#     <year>20051</year>
#     <price>30.00</price>
#   </book>
#   <book category="cooking2">
#     <title lang="en">Everyday Italian2</title>
#     <author>Giada De Laurentiis2</author>
#     <year>20052</year>
#     <price>60.00</price>
#   </book>
# </bookstore>
# """

# tree = etree.XML(xml_str)
# print(dir(tree))
#
# book = tree.find('book')
# print(book.attrib)
#
#
# books = tree.findall('book')
# for b in books:
#     print(type(b))
#
# title = tree.find('book/title')
# year = tree.find('book/year')
# print(title, year)
#
#


